0. Preparation

library(tidyverse)
## ── Attaching packages ───────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2     ✓ purrr   0.3.4
## ✓ tibble  3.0.3     ✓ dplyr   1.0.2
## ✓ tidyr   1.1.2     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.5.0
## ── Conflicts ──────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
# setwd("/Volumes/Long_long/DAEN690Dragon/YES_CSV")
overall_summary <- read.csv("overall_summary.csv") %>%
  mutate(volume_of_ISSR = volume_of_ISSR*100)
overall_summary
layer_summary <- read.csv("layer_summary.csv") %>%
    mutate(volume_of_ISSR = volume_of_ISSR*100)
layer_summary

1. Statistical Analysis for the 3D Grid

1.1. Calculate hourly/daily percentage change

# calculate hourly/daily percentage change 
overall_summary <- overall_summary %>%
  mutate(hourly_pct_change = (num_of_ISSR/lag(num_of_ISSR) - 1) * 100,
         daily_pct_change = (num_of_ISSR/lag(num_of_ISSR, 24) - 1) * 100) %>%
  relocate(hourly_pct_change, .before = ceiling) %>%
  relocate(daily_pct_change, .before = ceiling)
overall_summary 

1.2. Trend of volume_of_ISSR

task2 <- overall_summary %>%
  mutate(month = floor(date/100)%%100)
task2$month<-factor(task2$month)

ggplot(data = task2, mapping = aes(x = datetime_id, y = volume_of_ISSR, fill = month)) +
  geom_bar(stat = "identity") +
  theme( axis.text.x=element_blank(), axis.ticks.x=element_blank()) +
  ylab("volume_of_ISSR (%)") + xlab("Hourly Datetime") +
  ggtitle("Trend of hourly updated volume_of_ISSR")

task3 <- overall_summary %>%
  group_by(date_id, date) %>%
  summarise(avg_volume_of_ISSR = sum(volume_of_ISSR)/24) %>%
  ungroup() %>%
  mutate(pct_change = (avg_volume_of_ISSR/lag(avg_volume_of_ISSR) - 1) * 100) %>%
  mutate(month = floor(date/100)%%100)
## `summarise()` regrouping output by 'date_id' (override with `.groups` argument)
task3$date<-factor(task3$date)
task3$month<-factor(task3$month)
task3
ggplot(data = task3, mapping = aes(x = date, y = avg_volume_of_ISSR, fill = month)) +
  geom_bar(stat = "identity") +
  theme( axis.text.x=element_blank(), axis.ticks.x=element_blank()) +
  ylab("avg_volume_of_ISSR (%)") + xlab("Dates") +
  ggtitle("Trend of daily average volume_of_ISSR")

month_days_df = data.frame(month = c(5,6,7,8,9,10,11,12),
                           month_days = c(15,30,31,31,30,31,30,31))
task5 <- overall_summary %>%
  mutate(month = floor(date/100)%%100) %>%
  left_join(month_days_df, by = "month") %>%
  group_by(month) %>%
  summarise(avg_volume_of_ISSR = sum(volume_of_ISSR)/24/min(month_days))
## `summarise()` ungrouping output (override with `.groups` argument)
task5$month<-factor(task5$month)
task5
ggplot(data = task5, mapping = aes(x = month, y = avg_volume_of_ISSR, fill= month)) +
  geom_bar(stat = "identity") +
  ylab("avg_volume_of_ISSR (%)") + xlab("Months") +
  ggtitle("Monthly average volume_of_ISSR") +
  geom_text(aes(label = round(avg_volume_of_ISSR, 4)),
            vjust = -1.1, size = 3,
            show.legend = FALSE)

1.3. Midnight and Noon

midnight_noon <- overall_summary %>%
  select(datetime_id, date_id, date, hour, volume_of_ISSR) %>%
  mutate(month = floor(date/100)%%100) %>%
  filter(hour == 0 | hour == 12)
midnight_noon
midnight_noon$hour <- factor(midnight_noon$hour)
ggplot(midnight_noon, aes(x = date, y = volume_of_ISSR, col=hour)) + 
  geom_line() +
  facet_wrap(~ month, nrow = 2,ncol = 4, scales = "free_x") +
  ylab("volume_of_ISSR (%)") + xlab("Dates") +
  ggtitle("Volume_of_ISSR at midnight and noon by month") +
  theme(axis.text.x = element_text(angle = 30))

month_days_df = data.frame(month = c(5,6,7,8,9,10,11,12),
                           month_days = c(15,30,31,31,30,31,30,31))

midnight_noon <- overall_summary %>%
  select(datetime_id, date_id, date, hour, volume_of_ISSR) %>%
  mutate(month = floor(date/100)%%100) %>%
  left_join(month_days_df, by = "month") %>%
  filter(hour == 0 | hour == 12) %>%
  group_by(month, hour) %>%
  summarise(avg_volume_of_ISSR = sum(volume_of_ISSR)/month_days)
## `summarise()` regrouping output by 'month', 'hour' (override with `.groups` argument)
midnight_noon
midnight_noon$hour <- factor(midnight_noon$hour)
ggplot(midnight_noon, aes(x = month, y = avg_volume_of_ISSR, col=hour)) + 
  geom_line() +
  geom_point() +
  ylab("Monthly average volume_of_ISSR (%)") + xlab("Months") +
  ggtitle("Monthly average volume_of_ISSR at midnight and noon") +
  geom_text(aes(label = round(avg_volume_of_ISSR, 4)),
            vjust = -1.1, size = 3,
            show.legend = FALSE)

month_days_df = data.frame(month = c(5,6,7,8,9,10,11,12),
                           month_days = c(15,30,31,31,30,31,30,31))

midnight_noon <- overall_summary %>%
  select(datetime_id, date_id, date, hour, volume_of_ISSR) %>%
  mutate(month = floor(date/100)%%100) %>%
  left_join(month_days_df, by = "month") %>%
  group_by(month, hour) %>%
  summarise(avg_volume_of_ISSR = sum(volume_of_ISSR)/month_days)
## `summarise()` regrouping output by 'month', 'hour' (override with `.groups` argument)
midnight_noon
midnight_noon$hour <- factor(midnight_noon$hour)
ggplot(midnight_noon, aes(x = month, y = avg_volume_of_ISSR, col=hour)) + 
  geom_line() +
  ylab("Monthly average volume_of_ISSR (%)") + xlab("Months") +
  ggtitle("Monthly average volume_of_ISSR at each hour")

1.4. Ceilings and floors by months

ceiling_floor <- overall_summary %>%
  select(datetime_id, date_id, date, hour, ceiling, floor) %>%
  rename(ceiling = floor, floor = ceiling) %>%
  pivot_longer(cols = c(ceiling, floor),
               names_to = 'type',
               values_to = 'pressure_level')
ceiling_floor
ggplot(ceiling_floor, aes(x = datetime_id, y = pressure_level, col=type)) + 
  geom_line() +
  ylab("Pressure Levels (Pa)") + xlab("Hourly Datetimes") +
  ggtitle("Ceilings and Floors at each datetime")+
  scale_y_reverse()

ceiling_floor2 <- ceiling_floor %>%
  group_by(date, type) %>%
  summarise(avg_pressure_level = sum(pressure_level)/24) %>%
  ungroup() %>%
  mutate(month = floor(date/100)%%100) 
## `summarise()` regrouping output by 'date' (override with `.groups` argument)
ceiling_floor2
ggplot(ceiling_floor2, aes(x = date, y = avg_pressure_level, col=type)) + 
  geom_line() +
  facet_wrap(~ month, nrow = 2,ncol = 4, scales = "free_x") +
  ylab("Daily Average Pressure Levels (Pa)") + xlab("Dates") +
  ggtitle("Daily average ceilings and floors by month") +
  scale_y_reverse()+
  theme(axis.text.x = element_text(angle = 30))

month_days_df = data.frame(month = c(5,6,7,8,9,10,11,12),
                           month_days = c(15,30,31,31,30,31,30,31))

ceiling_floor3 <- ceiling_floor %>%
  mutate(month = floor(date/100)%%100) %>%
  left_join(month_days_df, by = "month") %>%
  group_by(month, type) %>%
  summarise(avg_pressure_level = sum(pressure_level)/24/min(month_days))
## `summarise()` regrouping output by 'month' (override with `.groups` argument)
ceiling_floor3
ggplot(ceiling_floor3, aes(x = month, y = avg_pressure_level, col=type)) + 
  geom_line() +
  geom_point() +
  ylab("Monthly Average Pressure Levels (Pa)") + xlab("Month") +
  ggtitle("Monthly average ceilings and floors") +
  scale_y_reverse() +
  geom_text(aes(label = round(avg_pressure_level, 0)),
            vjust = 1.5, hjust = "inward", size = 3,
            show.legend = FALSE)

2. Statistical Analysis for Layers/Horizontal Grids

layer_summary

2.1. Volume of ISSRs (%) at different pressure lavels by months

month_days_df = data.frame(month = c(5,6,7,8,9,10,11,12),
                           month_days = c(15,30,31,31,30,31,30,31))

task4 <- layer_summary %>%
  mutate(month = floor(date/100)%%100) %>%
  left_join(month_days_df, by = "month") %>%
  group_by(month, pressure_level) %>%
  summarise(avg_volume_of_ISSR = sum(volume_of_ISSR)/min(month_days)/24 ) %>%
  arrange(pressure_level)
## `summarise()` regrouping output by 'month' (override with `.groups` argument)
task4$pressure_level <- factor(task4$pressure_level)
task4$month <- factor(task4$month)
task4
ggplot(data = task4, mapping = aes(x = pressure_level, y =avg_volume_of_ISSR, fill = month)) +
  geom_bar(stat = "identity", position = "dodge") + 
  labs(title = "Volume of ISSRs in different months by pressure levels",
       x = "Pressure Levels (Pa)",
       y = "Volume of ISSRs (%)") +
  theme(axis.text.x = element_text(angle = 45, face="bold"),
        axis.text.y = element_text(face="bold")) +
  theme(plot.title = element_text(size = 15, face="bold.italic"),
        axis.title.x = element_text(face="bold"),
        axis.title.y = element_text(face="bold"))

ggplot(data = task4, mapping = aes(x = month, y =avg_volume_of_ISSR, fill = pressure_level)) +
  geom_bar(stat = "identity", position = "dodge") + 
  labs(title = "Volume of ISSRs in different months by pressure levels",
       x = "Pressure Levels (Pa)",
       y = "Volume of ISSRs (%)") +
  theme(axis.text.x = element_text(face="bold"),
        axis.text.y = element_text(face="bold")) +
  theme(plot.title = element_text(size = 15, face="bold.italic"),
        axis.title.x = element_text(face="bold"),
        axis.title.y = element_text(face="bold"))